#source('http://bioconductor.org/biocLite.R')
#biocLite('phyloseq')
library(phyloseq)
library(ggplot2)
library(plyr)
library(dplyr)
library(Rmisc)
library(DESeq2)
library(doParallel)
library(vegan)
library(grid)
library(gridExtra)
library(reshape2)
# Load biom file.
biom <- import_biom("OTU_table.biom", "~/Dropbox/clado-manuscript/Nephele/PipelineResults_NMEPINZ20QK1/nephele_outputs/tree.tre", parseFunction=parse_taxonomy_greengenes)
biom
# Load and merge sample metadata with read data (biom).
sam.data <- read.csv(file="sample.data.csv", row.names=1, header=TRUE)
sam.data$Date <- as.factor(sam.data$Date)
sam.data$DateSite <- paste(sam.data$Date, sam.data$Site)
sample_data(biom) <- sam.data
sample_data(biom)
## Sample Data: [52 samples by 6 sample variables]:
## TreatmentGroup Site Date Description
## C178N1 Early North 178 Sample of day 178 at site North 1
## C178P1 Early Point 178 Sample of day 178 at site Point 1
## C185P2 Early Point 185 Sample of day 185 at site Point 2
## C206N2 Late North 206 Sample of day 206 at site North 2
## C206P1 Late Point 206 Sample of day 206 at site Point 1
## C206P2 Late Point 206 Sample of day 206 at site Point 2
## C214P1 Late Point 214 Sample of day 214 at site Point 1
## C214P2 Late Point 214 Sample of day 214 at site Point 2
## C214P3 Late Point 214 Sample of day 214 at site Point 3
## C214S1 Late South 214 Sample of day 214 at site South 1
## C214S2 Late South 214 Sample of day 214 at site South 2
## C214S3 Late South 214 Sample of day 214 at site South 3
## C185P1 Early Point 185 Sample of day 185 at site Point 1
## C185P3 Early Point 185 Sample of day 185 at site Point 3
## C199P3 Late Point 199 Sample of day 199 at site Point 3
## C199S2 Late South 199 Sample of day 199 at site South 2
## C199S3 Late South 199 Sample of day 199 at site South 3
## C206N1 Late North 206 Sample of day 206 at site North 1
## C178P2 Early Point 178 Sample of day 178 at site Point 2
## C199N3 Late North 199 Sample of day 199 at site North 3
## C206S1 Late South 206 Sample of day 206 at site South 1
## C214N3 Late North 214 Sample of day 214 at site North 3
## C199N2 Late North 199 Sample of day 199 at site North 2
## C206N3 Late North 206 Sample of day 206 at site North 3
## C206S2 Late South 206 Sample of day 206 at site South 2
## C199N1 Late North 199 Sample of day 199 at site North 1
## C199P1 Late Point 199 Sample of day 199 at site Point 1
## C199S1 Late South 199 Sample of day 199 at site South 1
## C214N1 Late North 214 Sample of day 214 at site North 1
## C172P1 Early Point 172 Sample of day 172 at site Point 1
## C199P2 Late Point 199 Sample of day 199 at site Point 2
## C172N3 Early North 172 Sample of day 172 at site North 3
## C172S3 Early South 172 Sample of day 172 at site South 3
## C178S2 Early South 178 Sample of day 178 at site South 2
## C178P3 Early Point 178 Sample of day 178 at site Point 3
## C178S3 Early South 178 Sample of day 178 at site South 3
## C172N1 Early North 172 Sample of day 172 at site North 1
## C172S1 Early South 172 Sample of day 172 at site South 1
## C178N3 Early North 178 Sample of day 178 at site North 3
## C185N2 Early North 185 Sample of day 185 at site North 2
## C185N3 Early North 185 Sample of day 185 at site North 3
## C185S3 Early South 185 Sample of day 185 at site South 3
## C214N2 Late North 214 Sample of day 214 at site North 2
## C172P2 Early Point 172 Sample of day 172 at site Point 2
## C185S2 Early South 185 Sample of day 185 at site South 2
## C172P3 Early Point 172 Sample of day 172 at site Point 3
## C185N1 Early North 185 Sample of day 185 at site North 1
## C172N2 Early North 172 Sample of day 172 at site North 2
## C178S1 Early South 178 Sample of day 178 at site South 1
## C185S1 Early South 185 Sample of day 185 at site South 1
## C172S2 Early South 172 Sample of day 172 at site South 2
## C178N2 Early North 178 Sample of day 178 at site North 2
## SampleID.1 DateSite
## C178N1 C178N1 178 North
## C178P1 C178P1 178 Point
## C185P2 C185P2 185 Point
## C206N2 C206N2 206 North
## C206P1 C206P1 206 Point
## C206P2 C206P2 206 Point
## C214P1 C214P1 214 Point
## C214P2 C214P2 214 Point
## C214P3 C214P3 214 Point
## C214S1 C214S1 214 South
## C214S2 C214S2 214 South
## C214S3 C214S3 214 South
## C185P1 C185P1 185 Point
## C185P3 C185P3 185 Point
## C199P3 C199P3 199 Point
## C199S2 C199S2 199 South
## C199S3 C199S3 199 South
## C206N1 C206N1 206 North
## C178P2 C178P2 178 Point
## C199N3 C199N3 199 North
## C206S1 C206S1 206 South
## C214N3 C214N3 214 North
## C199N2 C199N2 199 North
## C206N3 C206N3 206 North
## C206S2 C206S2 206 South
## C199N1 C199N1 199 North
## C199P1 C199P1 199 Point
## C199S1 C199S1 199 South
## C214N1 C214N1 214 North
## C172P1 C172P1 172 Point
## C199P2 C199P2 199 Point
## C172N3 C172N3 172 North
## C172S3 C172S3 172 South
## C178S2 C178S2 178 South
## C178P3 C178P3 178 Point
## C178S3 C178S3 178 South
## C172N1 C172N1 172 North
## C172S1 C172S1 172 South
## C178N3 C178N3 178 North
## C185N2 C185N2 185 North
## C185N3 C185N3 185 North
## C185S3 C185S3 185 South
## C214N2 C214N2 214 North
## C172P2 C172P2 172 Point
## C185S2 C185S2 185 South
## C172P3 C172P3 172 Point
## C185N1 C185N1 185 North
## C172N2 C172N2 172 North
## C178S1 C178S1 178 South
## C185S1 C185S1 185 South
## C172S2 C172S2 172 South
## C178N2 C178N2 178 North
# Normalize by relative abundance.
biom.relabund <- transform_sample_counts(biom, function(x) x / sum(x))
# Ordination plot, k = 3.
ordNMDS.k3 <- ordinate(biom.relabund, method="NMDS", distance="bray", k=3)
## Run 0 stress 0.07869986
## Run 1 stress 0.07869949
## ... New best solution
## ... Procrustes: rmse 0.0002305481 max resid 0.001105219
## ... Similar to previous best
## Run 2 stress 0.07869962
## ... Procrustes: rmse 0.0002017883 max resid 0.001019135
## ... Similar to previous best
## Run 3 stress 0.07869844
## ... New best solution
## ... Procrustes: rmse 0.0004553477 max resid 0.001521332
## ... Similar to previous best
## Run 4 stress 0.08334584
## Run 5 stress 0.07874264
## ... Procrustes: rmse 0.002511844 max resid 0.01380174
## Run 6 stress 0.07869964
## ... Procrustes: rmse 0.0005900723 max resid 0.002571066
## ... Similar to previous best
## Run 7 stress 0.08334478
## Run 8 stress 0.07872072
## ... Procrustes: rmse 0.001339293 max resid 0.005271881
## ... Similar to previous best
## Run 9 stress 0.07870018
## ... Procrustes: rmse 0.0006557135 max resid 0.002997354
## ... Similar to previous best
## Run 10 stress 0.07869808
## ... New best solution
## ... Procrustes: rmse 0.0001766944 max resid 0.0005861921
## ... Similar to previous best
## Run 11 stress 0.07869856
## ... Procrustes: rmse 0.0002775179 max resid 0.001302053
## ... Similar to previous best
## Run 12 stress 0.08337886
## Run 13 stress 0.07869809
## ... Procrustes: rmse 0.0001472413 max resid 0.0003891141
## ... Similar to previous best
## Run 14 stress 0.07869902
## ... Procrustes: rmse 0.0003348988 max resid 0.001535
## ... Similar to previous best
## Run 15 stress 0.07869916
## ... Procrustes: rmse 0.0003981774 max resid 0.001889146
## ... Similar to previous best
## Run 16 stress 0.08334287
## Run 17 stress 0.08804081
## Run 18 stress 0.07869856
## ... Procrustes: rmse 0.0002547032 max resid 0.001142475
## ... Similar to previous best
## Run 19 stress 0.08695913
## Run 20 stress 0.07906707
## ... Procrustes: rmse 0.00900781 max resid 0.03814892
## *** Solution reached
ord.k3 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", color = "Date") + geom_point(size=2)
ord.k3 + theme_bw() + scale_colour_hue(h=c(300, 500))

ord.k3 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", color = "Date") + geom_point(size=5)
ord.k3 + theme_bw() + scale_colour_hue(h=c(300, 500))+
geom_point(colour="white", size = 3)+
geom_point(colour="black", size = 1)

ord.k3 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", color = "Date") + geom_point(size=5)
ord.k3 + theme_bw() + scale_colour_manual(values=c("grey20", "grey30", "grey40", "grey50", "grey60", "grey70")) +
geom_point(colour="white", size = 3)+
geom_point(colour="black", size = 1)

# PERMANOVA.
df = as(sample_data(biom), "data.frame")
d = phyloseq::distance(biom, "bray")
clado.adonis = adonis(d ~ Date*Site, df)
clado.adonis
##
## Call:
## adonis(formula = d ~ Date * Site, data = df)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Date 5 3.1945 0.63890 17.4143 0.41338 0.001 ***
## Site 2 1.5561 0.77804 21.2068 0.20136 0.001 ***
## Date:Site 10 1.7298 0.17298 4.7148 0.22384 0.001 ***
## Residuals 34 1.2474 0.03669 0.16142
## Total 51 7.7278 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
biom.rich.est <- estimate_richness(biom, measures = NULL)
biom.rich.est$SampleID.1 <- row.names(biom.rich.est)
biom.rich.est <- merge(biom.rich.est, sam.data, by = "SampleID.1")
biom.rich.est$Date <- as.character(biom.rich.est$Date)
biom.rich.est$Date <- as.numeric(biom.rich.est$Date)
head(biom.rich.est)
## SampleID.1 Observed Chao1 se.chao1 ACE se.ACE Shannon
## 1 C172N1 5139 6770.782 99.67941 7316.921 48.50633 5.444561
## 2 C172N2 5236 7017.650 107.98761 7420.840 47.98329 5.649933
## 3 C172N3 6890 8499.605 90.45120 9053.801 49.75824 5.980043
## 4 C172P1 2936 4855.835 138.81016 5427.422 47.24285 4.851552
## 5 C172P2 4833 7043.680 130.90171 7619.611 51.59008 5.306940
## 6 C172P3 3750 5099.247 96.43281 5368.421 41.19882 4.824680
## Simpson InvSimpson Fisher TreatmentGroup Site Date
## 1 0.9874631 79.76426 896.0810 Early North 172
## 2 0.9892749 93.23962 1001.0057 Early North 172
## 3 0.9899773 99.77399 1361.3171 Early North 172
## 4 0.9767986 43.10086 520.1978 Early Point 172
## 5 0.9803746 50.95444 904.5831 Early Point 172
## 6 0.9400545 16.68183 655.5148 Early Point 172
## Description DateSite
## 1 Sample of day 172 at site North 1 172 North
## 2 Sample of day 172 at site North 2 172 North
## 3 Sample of day 172 at site North 3 172 North
## 4 Sample of day 172 at site Point 1 172 Point
## 5 Sample of day 172 at site Point 2 172 Point
## 6 Sample of day 172 at site Point 3 172 Point
cbPalette <- c("#b5b5b5", "#777777", "#212121")
# Plot observed richness.
biom.rich.est.obs <- summarySE(biom.rich.est, measurevar="Observed", groupvars=c("Date","Site"))
p.obs <- ggplot(biom.rich.est.obs, aes(x=Date, y=Observed, color = Site)) + geom_point() + geom_errorbar(aes(ymin=Observed-se, ymax=Observed+se)) + geom_line() + scale_colour_manual(values=cbPalette)
#scale_colour_hue(h=c(400, 120))
p.obs + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Plot Shanon index richness.
biom.rich.est.sha <- summarySE(biom.rich.est, measurevar="Shannon", groupvars=c("Date","Site"))
p.sha <- ggplot(biom.rich.est.sha, aes(x=Date, y=Shannon, color = Site)) +
geom_point() + geom_errorbar(aes(ymin=Shannon-se, ymax=Shannon+se)) +
geom_line() + scale_colour_hue(h=c(400, 120))
p.sha +
theme_bw() +
theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Plot Shanon index richness.
biom.rich.est.sha <- summarySE(biom.rich.est, measurevar="Shannon", groupvars=c("Date","Site"))
p.sha <- ggplot(biom.rich.est.sha, aes(x=Date, y=Shannon, color = Site)) +
geom_point() + geom_errorbar(aes(ymin=Shannon-se, ymax=Shannon+se)) +
geom_line() + scale_colour_manual(values=cbPalette)
p.sha +
theme_bw() +
theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Find top 30 genera and subset biom.relabund.
sort.genera <- sort(tapply(taxa_sums(biom.relabund), tax_table(biom.relabund)[, "Genus"], sum), TRUE)
top.genera <- sort.genera[1:30]
top.genera.list <- names(top.genera)
biom.relabund.subset = subset_taxa(biom.relabund, Genus %in% top.genera.list)
biom.relabund.subset.taxa <- subset_taxa(biom.relabund.subset, Genus %in% as.factor(top.genera.list))
biom.relabund.subset.taxa
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 9093 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 9093 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 9093 tips and 9089 internal nodes ]
relabund.top.genera <- psmelt(biom.relabund.subset.taxa)
relabund.top.genera.genus <- relabund.top.genera%>%
group_by(Sample, Genus)%>%
mutate(GenusAbundance = sum(Abundance))%>%
distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Phylum, Family, Genus)
head(relabund.top.genera.genus)
## Source: local data frame [6 x 10]
## Groups: Sample, Genus [6]
##
## Sample GenusAbundance TreatmentGroup Site Date Phylum
## <chr> <dbl> <fctr> <fctr> <fctr> <fctr>
## 1 C178S2 0.14728018 Early South 178 Proteobacteria
## 2 C206N2 0.07541493 Late North 206 [Thermi]
## 3 C199S1 0.12408425 Late South 199 Proteobacteria
## 4 C185S1 0.12728636 Early South 185 Proteobacteria
## 5 C199S3 0.10486769 Late South 199 Proteobacteria
## 6 C172S1 0.10894857 Early South 172 Bacteroidetes
## # ... with 4 more variables: Family <fctr>, Genus <fctr>, Sample <chr>,
## # Genus <fctr>
# Summary of genus abundance of top 30 genera.
relabund.top.genera.genus.est <- summarySE(relabund.top.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.top.genera.genus.est)
## Site Date Genus N GenusAbundance sd se
## 1 North 172 Armatimonas 3 0.0082179013 0.0037601213 0.0021709070
## 2 North 172 Bdellovibrio 3 0.0023540936 0.0001963925 0.0001133873
## 3 North 172 Cellvibrio 3 0.0058399987 0.0066066072 0.0038143264
## 4 North 172 CM44 3 0.0088329518 0.0005298073 0.0003058844
## 5 North 172 Crenothrix 3 0.0008092387 0.0004823472 0.0002784833
## 6 North 172 Dechloromonas 3 0.0019270556 0.0026318679 0.0015195096
## ci
## 1 0.0093406591
## 2 0.0004878661
## 3 0.0164117220
## 4 0.0013161143
## 5 0.0011982168
## 6 0.0065379223
relabund.top.genera.genus.est$Date <- as.character(relabund.top.genera.genus.est$Date)
relabund.top.genera.genus.est$Date <- as.numeric(relabund.top.genera.genus.est$Date)
# Plot summary of genus abundance of top 30 genera.
p <- ggplot(relabund.top.genera.genus.est, aes(x=Date, y=GenusAbundance, color = Site)) + geom_point() + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + geom_line() + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Find methanotrophic bacteria by genus.
methanolist <- read.table(file = "taxa-of-interest/methanos.txt")
methanolist <- as.vector(methanolist$V1)
biom.relabund.methanos <- subset_taxa(biom.relabund, Genus %in% as.factor(methanolist))
biom.relabund.methanos
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 567 taxa and 52 samples ]
## sample_data() Sample Data: [ 52 samples by 6 sample variables ]
## tax_table() Taxonomy Table: [ 567 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 567 tips and 566 internal nodes ]
relabund.methanos <- psmelt(biom.relabund.methanos)
relabund.methanos.genus <- relabund.methanos%>%
group_by(Sample, Genus)%>%
mutate(GenusAbundance = sum(Abundance))%>%
distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Phylum, Family, Genus)
relabund.methanos.genus.est <- summarySE(relabund.methanos.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.methanos.genus.est)
## Site Date Genus N GenusAbundance sd se
## 1 North 172 Crenothrix 3 8.092387e-04 4.823472e-04 2.784833e-04
## 2 North 172 Methylibium 3 6.600147e-04 3.800437e-04 2.194183e-04
## 3 North 172 Methylocaldum 3 7.717447e-05 5.988775e-05 3.457621e-05
## 4 North 172 Methylomicrobium 3 0.000000e+00 0.000000e+00 0.000000e+00
## 5 North 172 Methylomonas 3 1.097031e-04 2.736128e-06 1.579704e-06
## 6 North 172 Methylosinus 3 4.677898e-05 2.432451e-05 1.404376e-05
## ci
## 1 1.198217e-03
## 2 9.440809e-04
## 3 1.487694e-04
## 4 0.000000e+00
## 5 6.796919e-06
## 6 6.042543e-05
relabund.methanos.genus.est$Date <- as.character(relabund.methanos.genus.est$Date)
relabund.methanos.genus.est$Date <- as.numeric(relabund.methanos.genus.est$Date)
# Plot summary of genus abundance of methanotrophic genera.
p <- ggplot(relabund.methanos.genus.est, aes(x=Date, y=GenusAbundance, color = Site)) + geom_point() + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + geom_line() + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Find all genera
all.genera <- sort(get_taxa_unique(biom.relabund, "Genus"), decreasing=FALSE)
biom.relabund.all.genera <- subset_taxa(biom.relabund, Genus %in% as.factor(all.genera))
biom.relabund.all.genera <- psmelt(biom.relabund.all.genera)
biom.relabund.all.genera.genus <- biom.relabund.all.genera%>%
group_by(Sample, Genus)%>%
mutate(GenusAbundance = sum(Abundance))%>%
distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
biom.relabund.all.genera.genus.est <- summarySE(biom.relabund.all.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(biom.relabund.all.genera.genus.est)
## Site Date Genus N GenusAbundance sd se
## 1 North 172 A17 3 1.044934e-05 7.240333e-06 4.180208e-06
## 2 North 172 Achromobacter 3 1.205785e-06 2.088482e-06 1.205785e-06
## 3 North 172 Acidaminobacter 3 2.030206e-05 3.516421e-05 2.030206e-05
## 4 North 172 Acidocella 3 0.000000e+00 0.000000e+00 0.000000e+00
## 5 North 172 Acidovorax 3 4.023211e-05 3.508560e-05 2.025668e-05
## 6 North 172 Acinetobacter 3 1.395043e-04 1.803168e-04 1.041059e-04
## ci
## 1 1.798598e-05
## 2 5.188076e-06
## 3 8.735273e-05
## 4 0.000000e+00
## 5 8.715747e-05
## 6 4.479317e-04
biom.relabund.all.genera.genus.est$Date <- as.character(biom.relabund.all.genera.genus.est$Date)
biom.relabund.all.genera.genus.est$Date <- as.numeric(biom.relabund.all.genera.genus.est$Date)
p <- ggplot(biom.relabund.all.genera.genus.est, aes(x=Date, y=GenusAbundance, color = Site)) + geom_point() + geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) + geom_line() + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))
![]()